R Markdown
library(modvarsel)
##
## Attaching package: 'modvarsel'
## The following objects are masked from 'package:graphics':
##
## barplot, boxplot
library(mfe)
##
## Attaching package: 'mfe'
## The following object is masked from 'package:modvarsel':
##
## cv_bandwidth
## The following object is masked from 'package:stats':
##
## rf
data <- as.matrix(read.csv('mfe/data/indicateurs.csv', header = TRUE, sep = ';', dec = ','))
# #script R pour creer les modeles
# choiceModels <- list()
# PI = list(EI = 1:3,
# QC = 4:10,
# QN = 11:17,
# QS = 18:24)
#
# for (i in 1:24){
# if (i %in% PI$EI){
# res <- choicemod(X = data[,-PI$EI], Y = data[, i], method = c("linreg", "sir", "rf", "plsr", "ridge"), N = 50, nperm = 100)
# }
#
# if (i %in% PI$EI){
# res <- choicemod(X = data[,-PI$EI], Y = data[, i], method = c("linreg", "sir", "rf", "plsr", "ridge"), N = 50, nperm = 100)
# }
#
# if (i %in% PI$EI){
# res <- choicemod(X = data[,-PI$EI], Y = data[, i], method = c("linreg", "sir", "rf", "plsr", "ridge"), N = 50, nperm = 100)
# }
#
# if (i %in% PI$EI){
# res <- choicemod(X = data[,-PI$EI], Y = data[, i], method = c("linreg", "sir", "rf", "plsr", "ridge"), N = 50, nperm = 100)
# }
# choiceModels[[i]] <- res
# save(choiceModels, file = '../data/choiceModels2.RData')
# }
#select and save the model in a list
set.seed(123)
load('mfe/data/choiceModelsFinal.RData')
models <- list()
for (i in 1:24){
#load modeles
Ylabel <- colnames(data)[i]
print(paste('Etude des modeles de prediction de:', Ylabel, sep = ' '))
head(choiceModels[[i]]$mse)
head(choiceModels[[i]]$mse_all)
head(choiceModels[[i]]$r2)
#selection of the best model on the R2
perf_moy <- apply(X = choiceModels[[i]]$mse, MARGIN = 2, FUN = mean, na.rm = TRUE)
sel_model <- which(perf_moy == min(perf_moy))
#plot the best method ----
#RMSE
boxplot(choiceModels[[i]], method = c("linreg", "sir", 'rf', "plsr", "ridge"),
type = "both", col = rgb(0,1,0,0.2), las = 2,
main = "N = 50 replications")
######plot the variables selected for each model ----
##linreg
#varaible selected
barplot(choiceModels[[i]], method="linreg", type="varsel", las = 2,
main = "linreg", col = rgb(1,1,0,0.2))
#total use of each variable in all the simulation
barplot(choiceModels[[i]], method="linreg", type="sizemod", las = 2,
main = "linreg", col = rgb(0,1,1,0.2))
##sir
#varaible selected
barplot(choiceModels[[i]], method="sir", type="varsel", las = 2,
main = "sir", col = rgb(1,1,0,0.2))
#total use of each variable in all the simulation
barplot(choiceModels[[i]], method="sir", type="sizemod", las = 2,
main = "sir", col = rgb(0,1,1,0.2))
##rf
#varaible selected
barplot(choiceModels[[i]], method="rf", type="varsel", las = 2,
main = "rf", col = rgb(1,1,0,0.2))
#total use of each variable in all the simulation
barplot(choiceModels[[i]], method="rf", type="sizemod", las = 2,
main = "rf", col = rgb(0,1,1,0.2))
##plsr
#varaible selected
barplot(choiceModels[[i]], method="plsr", type="varsel", las = 2,
main = "plsr", col = rgb(1,1,0,0.2))
#total use of each variable in all the simulation
barplot(choiceModels[[i]], method="plsr", type="sizemod", las = 2,
main = "plsr", col = rgb(0,1,1,0.2))
##ridge
#varaible selected
barplot(choiceModels[[i]], method="ridge", type="varsel", las = 2,
main = "ridge", col = rgb(1,1,0,0.2))
#total use of each variable in all the simulation
barplot(choiceModels[[i]], method="ridge", type="sizemod", las = 2,
main = "ridge", col = rgb(0,1,1,0.2))
#Selection of the variable with the importance
Y <- data[, i]
X <- data[,which(colnames(data) %in% rownames(choiceModels[[i]]$pvarsel))]
methode <- c('linreg', 'sir', 'rf', 'plsr', 'ridge')[sel_model]
print(paste('choix methode:', methode, sep = ' '))
#etude de l'importance des variables
imp <- varimportance(X = X, Y = Y, method = methode, nperm = 100)
meanvi <- colMeans(imp$mat_imp) # mean importance
knitr::kable(t(meanvi),digits=2)
knitr::kable(imp$base_imp, digit=2) # baseline value of importance
plot(imp,choice="boxplot", col="lightgreen")
plot(imp,choice="meanplot", cutoff = TRUE)
sel <- which(colnames(data) %in% select(imp, cutoff=TRUE)$var)
Xnew <- data[, sel]
#model final -----
if (methode == 'linreg'){
models[[i]] <- reg_lm(X = Xnew, Y = Y, Ylabel = Ylabel)
print(summary(models[[i]]))
#prediction of the input data
y_pred <- predict(models[[i]], Xnew)
}
if (methode == 'sir'){
models[[i]] <- sir(X = Xnew, Y = Y, Ylabel = Ylabel)
#visualize ebouli plot
plot(models[[i]], choice = 'eigenValue')
#visualize the performance of the non-parametric estimation
plot(models[[i]], choice = 'smoothing')
# Prediction of the input data
y_pred <- predict(models[[i]], Xnew)
}
if (methode == 'rf'){
models[[i]] <- rf(X = Xnew, Y = Y, Ylabel = Ylabel)
#prediction of the input data
y_pred <- predict(models[[i]], Xnew)
}
if (methode == 'ridge'){
models[[i]] <- ridge(X = Xnew, Y = Y, Ylabel = Ylabel)
print(summary(models[[i]]))
#prediction of the input data
y_pred <- predict(models[[i]], Xnew)
}
}
## [1] "Etude des modeles de prediction de: PV"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 99 -none- numeric
## cvm 99 -none- numeric
## cvsd 99 -none- numeric
## cvup 99 -none- numeric
## cvlo 99 -none- numeric
## nzero 99 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 2 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 60 -none- numeric
## [1] "Etude des modeles de prediction de: GMQ"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 97 -none- numeric
## cvm 97 -none- numeric
## cvsd 97 -none- numeric
## cvup 97 -none- numeric
## cvlo 97 -none- numeric
## nzero 97 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 6 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 180 -none- numeric
## [1] "Etude des modeles de prediction de: Eff_alim"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 98 -none- numeric
## cvm 98 -none- numeric
## cvsd 98 -none- numeric
## cvup 98 -none- numeric
## cvlo 98 -none- numeric
## nzero 98 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 3 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 90 -none- numeric
## [1] "Etude des modeles de prediction de: col_C"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 98 -none- numeric
## cvm 98 -none- numeric
## cvsd 98 -none- numeric
## cvup 98 -none- numeric
## cvlo 98 -none- numeric
## nzero 98 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 7 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 210 -none- numeric
## [1] "Etude des modeles de prediction de: pH"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 99 -none- numeric
## cvm 99 -none- numeric
## cvsd 99 -none- numeric
## cvup 99 -none- numeric
## cvlo 99 -none- numeric
## nzero 99 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 2 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 60 -none- numeric
## [1] "Etude des modeles de prediction de: Qte_gras"











## [1] "choix methode: rf"

## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.

## [1] "Etude des modeles de prediction de: Prop_gras"











## [1] "choix methode: rf"


## [1] "Etude des modeles de prediction de: poids_carc"











## [1] "choix methode: linreg"


##
## Call:
## lm(formula = Y ~ ., data = as.data.frame(X))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.8952 -4.9275 -0.5383 4.6608 13.4423
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.833e-09 1.316e+00 0.000 1e+00
## PV 6.095e-01 2.374e-02 25.673 <2e-16 ***
## GMQ -1.996e+01 4.643e+00 -4.299 2e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.207 on 27 degrees of freedom
## Multiple R-squared: 0.9621, Adjusted R-squared: 0.9592
## F-statistic: 342.3 on 2 and 27 DF, p-value: < 2.2e-16
##
## [1] "Etude des modeles de prediction de: confo"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 97 -none- numeric
## cvm 97 -none- numeric
## cvsd 97 -none- numeric
## cvup 97 -none- numeric
## cvlo 97 -none- numeric
## nzero 97 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 7 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 210 -none- numeric
## [1] "Etude des modeles de prediction de: col_lumi"











## [1] "choix methode: rf"


## [1] "Etude des modeles de prediction de: ratio_C160_C180"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 93 -none- numeric
## cvm 93 -none- numeric
## cvsd 93 -none- numeric
## cvup 93 -none- numeric
## cvlo 93 -none- numeric
## nzero 93 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 6 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 180 -none- numeric
## [1] "Etude des modeles de prediction de: lipide_tnr"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 98 -none- numeric
## cvm 98 -none- numeric
## cvsd 98 -none- numeric
## cvup 98 -none- numeric
## cvlo 98 -none- numeric
## nzero 98 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 16 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 480 -none- numeric
## [1] "Etude des modeles de prediction de: AGMI_vs_AGPI"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 98 -none- numeric
## cvm 98 -none- numeric
## cvsd 98 -none- numeric
## cvup 98 -none- numeric
## cvlo 98 -none- numeric
## nzero 98 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 16 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 480 -none- numeric
## [1] "Etude des modeles de prediction de: AGL"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 96 -none- numeric
## cvm 96 -none- numeric
## cvsd 96 -none- numeric
## cvup 96 -none- numeric
## cvlo 96 -none- numeric
## nzero 96 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 2 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 60 -none- numeric
## [1] "Etude des modeles de prediction de: AGtr"











## [1] "choix methode: sir"




## [1] "Etude des modeles de prediction de: n6_sur_n3"











## [1] "choix methode: rf"


## [1] "Etude des modeles de prediction de: CLA"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 98 -none- numeric
## cvm 98 -none- numeric
## cvsd 98 -none- numeric
## cvup 98 -none- numeric
## cvlo 98 -none- numeric
## nzero 98 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 4 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 120 -none- numeric
## [1] "Etude des modeles de prediction de: tendrete"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 91 -none- numeric
## cvm 91 -none- numeric
## cvsd 91 -none- numeric
## cvup 91 -none- numeric
## cvlo 91 -none- numeric
## nzero 91 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 2 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 60 -none- numeric
## [1] "Etude des modeles de prediction de: jutosite"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 97 -none- numeric
## cvm 97 -none- numeric
## cvsd 97 -none- numeric
## cvup 97 -none- numeric
## cvlo 97 -none- numeric
## nzero 97 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 3 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 90 -none- numeric
## [1] "Etude des modeles de prediction de: amere"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 89 -none- numeric
## cvm 89 -none- numeric
## cvsd 89 -none- numeric
## cvup 89 -none- numeric
## cvlo 89 -none- numeric
## nzero 89 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 2 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 60 -none- numeric
## [1] "Etude des modeles de prediction de: rance_poisson"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 73 -none- numeric
## cvm 73 -none- numeric
## cvsd 73 -none- numeric
## cvup 73 -none- numeric
## cvlo 73 -none- numeric
## nzero 73 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 2 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 60 -none- numeric
## [1] "Etude des modeles de prediction de: gras_vs_metal"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 86 -none- numeric
## cvm 86 -none- numeric
## cvsd 86 -none- numeric
## cvup 86 -none- numeric
## cvlo 86 -none- numeric
## nzero 86 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 5 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 150 -none- numeric
## [1] "Etude des modeles de prediction de: sang_acide"











## [1] "choix methode: ridge"


## Length Class Mode
## lambda 94 -none- numeric
## cvm 94 -none- numeric
## cvsd 94 -none- numeric
## cvup 94 -none- numeric
## cvlo 94 -none- numeric
## nzero 94 -none- numeric
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## Xlabels 2 -none- character
## Ylabel 1 -none- character
## y_train 30 -none- numeric
## x_train 60 -none- numeric
## [1] "Etude des modeles de prediction de: intense_flav_sucre"











## [1] "choix methode: rf"


# save(models, file = '../data/models2.RData')
#add under models
for (i in 1:length(models)){
if (class(models[[i]]) == 'reg_lm') models[[i]]$all_models <- underModel.reg_lm(models[[i]], B = 100)
if (class(models[[i]]) == 'rf') models[[i]]$all_models <- underModel.rf(models[[i]], B = 100)
if (class(models[[i]]) == 'sir') models[[i]]$all_models <- underModel.sir(models[[i]], B = 100)
}
save(models, file = 'mfe/data/modelsFifi.RData')